import csv

SENSITIVE_WORDS = {'暴力', '色情', '赌博', '诈骗', '毒品'}


def clean_data(news_list):
    cleaned = []
    for news in news_list:
        # 过滤空值
        if not news['title'] or not news['link']:
            continue

        # 过滤敏感词
        if any(word in news['title'] for word in SENSITIVE_WORDS):
            continue

        cleaned.append(news)

    # 去重处理
    seen = set()
    unique_news = []
    for news in cleaned:
        if news['title'] not in seen:
            seen.add(news['title'])
            unique_news.append(news)

    # 保存为CSV
    with open('news.csv', 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=['title', 'link'])
        writer.writeheader()
        writer.writerows(unique_news)

    return unique_news